Setup

Install and Attach

libinstall <- function(pkg, lib=TRUE) {
    if(!require(pkg, character.only = T))install.packages(pkg)
    if(lib)library(pkg, character.only = T)
}

# https://rstudio.github.io/distill/tables.html

libinstall("rmarkdown")
libinstall("kableExtra")
disp=function(tbl, nhead=0, ntail=0, style=paged_table){
    if(is.null(style))style=function(t){
        kbl(t)%>%
            style()%>%
            return()
    }
    if(isTRUE(getOption('knitr.in.progress'))){
        if(nhead!=0)tbl=head(tbl, n=nhead)
        if(ntail!=0)tbl=tail(tbl, n=ntail)
        return(
            tbl%>%
        style()
        )
    }
    return(tbl)
}

libinstall("tidyverse")
libinstall("glue")
libinstall("readr")
libinstall("plotly")
libinstall("readr")
libinstall("readxl")
libinstall("lubridate")
libinstall("curl")
libinstall("epidata")

Import Data

Import cpsaat Data

if(!curl::has_internet())quit()

# Download cpsaat data
tmp <- tempfile()
curl_download("https://www.bls.gov/cps/cpsaat11.xlsx", destfile = tmp)

# Import cpsaat
cpsaat11 <- read_excel(
        tmp,
        col_names = c(
            "Occupation",
            "Total",
            "Women",
            "White",
            "Black/African American",
            "Asian",
            "Hispanic/Latino"
        ),
        na = "–",
        col_types = c(
            Occupation="text",
            Total="numeric",
            "Women"="numeric",
            "White"="numeric",
            "Black/African American"="numeric",
            "Asian"="numeric",
            "Hispanic/Latino"="numeric"
        ),
        skip = 7
    )%>%
    drop_na(Occupation)
file.remove(tmp)
## [1] TRUE
rm(tmp)

Import EPI Data

Get the data at EPI

Labor_force_participation <- epidata::get_labor_force_participation_rate(by = "gr")

Medianaverage_hourly_wages <- epidata::get_median_and_mean_wages(by = "gr")

Minimum_wage <- epidata::get_minimum_wage()

Clean Data

Clean cpsaat11

cpsaat11%>%disp()

Looks fine.

Clean Labor_force_participation

Labor_force_participation%>%disp()
Participation=Labor_force_participation%>%
    pivot_longer(-c(date), names_to = "Race", values_to = "Participation", values_drop_na = T)%>%
    separate(Race, into = c("Race", "Gender"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 3012 rows [1, 2,
## 3, 4, 7, 10, 13, 14, 15, 16, 19, 22, 25, 26, 27, 28, 31, 34, 37, 38, ...].
Participation=Participation%>%
    filter(grepl("women|men", Race, ignore.case = T))%>%
    mutate(
        Gender=Race,
        Race=NA_character_
    )%>%
    union(
        Participation%>%
            filter(!grepl("women|men", Race, ignore.case = T))
    )
Participation%>%
    filter(!is.na(Race))
## # A tibble: 5,020 x 4
##    date       Race     Gender Participation
##    <date>     <chr>    <chr>          <dbl>
##  1 1978-12-01 all      <NA>           0.634
##  2 1978-12-01 black    <NA>           0.617
##  3 1978-12-01 black    women          0.535
##  4 1978-12-01 black    men            0.718
##  5 1978-12-01 hispanic <NA>           0.633
##  6 1978-12-01 hispanic women          0.47 
##  7 1978-12-01 hispanic men            0.812
##  8 1978-12-01 white    <NA>           0.635
##  9 1978-12-01 white    women          0.499
## 10 1978-12-01 white    men            0.785
## # ... with 5,010 more rows
rm(Labor_force_participation)

Clean Medianaverage_hourly_wages

Medianaverage_hourly_wages%>%disp()
Wages=Medianaverage_hourly_wages%>%
    pivot_longer(-date, names_to = "Race", values_to = "Wage", values_drop_na = T)%>%
    separate(Race, into = c("Race", "Gender", "Summary"), fill = "left")

# Race is in the wrong location sometimes
Wages=Wages%>%
    filter(!grepl("women|men", Gender, ignore.case = T))%>%
    mutate(
        Race=Gender,
        Gender=NA_character_
    )%>%
    union(
        Wages%>%
            filter(grepl("women|men", Gender, ignore.case = T))
    )
# No need to keep the Average and Median split up
Wages=Wages%>%
    pivot_wider(names_from = Summary, values_from = Wage)
rm(Medianaverage_hourly_wages)

Clean Minimum_wage

Minimum_wage%>%disp()
#adjust for inflation to get to common 2019
Minimum_wage=Minimum_wage%>%
    mutate(
        Min2019=priceR::adjust_for_inflation(
            federal_minimum_wage_real_x_2018_dollars,
            2018,
            "US",
            2019
        )
    )
## Retrieving countries data
## Generating URL to request all 297 results
## Retrieving inflation data for US 
## Generating URL to request all 61 results
Minimum_wage=Minimum_wage%>%
    rename(MinCur=federal_minimum_wage_nominal_dollars)%>%
    select(Min2019, MinCur, date)

Fix inconsistant case

Wages=Wages%>%
    rename(
        Date=date,
        Median=median,
        Average=average
    )

Participation=Participation%>%
    rename(Date=date)

Minimum_wage=Minimum_wage%>%
    rename(Date=date)

Graph

Average and Medium Wage over Time by Race and Gender

g=Wages%>%
    ggplot(aes(col=Race, x=Date))+
    geom_line(aes(y=Average))+
    geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
    facet_wrap(~Gender)
ggplotly(g)
g=Wages%>%
    ggplot(aes(col=Race, x=Date))+
    geom_line(aes(y=Median))+
    geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
    facet_wrap(~Gender)
ggplotly(g)

Scatter Plot over Time

g=Wages%>%
    ggplot()+
    geom_point(aes(x=Median, y=Average, col=Race, shape=Gender, frame=Date))+
    ggtitle("Median vs Average Wage per Race and Gender over Time")
## Warning: Ignoring unknown aesthetics: frame
ggplotly(g)